import ddgs.cli
from ddgs import DDGS
import trafilatura


def extract_main_content(url):
    """提取网页主要内容"""
    try:
        # 下载网页
        downloaded = trafilatura.fetch_url(url)

        if downloaded is None:
            print(f"无法访问URL: {url}")
            return None

        # 提取主要内容
        result = trafilatura.extract(downloaded)

        if result:
            print(f"✅ 成功提取内容，长度: {len(result)} 字符")
            return result
        else:
            print("❌ 未能提取到有效内容")
            return None

    except Exception as e:
        print(f"处理过程中出错: {str(e)}")
        return None


# 使用示例
if __name__ == "__main__":
    results = DDGS().text(
        query="math",
        region='cn-zh',
        max_results=5,
    )
    print(results)

    for item in results:
        url = item["href"]
        content = extract_main_content(url)
        if content:
            print("=========================")
            print(content)

